Source of Data

More data
- US 1993 life table for the same age group
us.93 <- data.frame(x = graunt$x, lx.93 = c(100, 99, 99, 98, 97, 95, 92, 84, 70))
Into one single data frame
- Combine two data frames into one single data frame
(graunt.us <- data.frame(graunt, lx.93 = us.93$lx))
## x lx.17th lx.93
## 1 0 100 100
## 2 6 64 99
## 3 16 40 99
## 4 26 25 98
## 5 36 16 97
## 6 46 10 95
## 7 56 6 92
## 8 66 3 84
## 9 76 1 70
Life Expectancy
- The basic principle is that the area under the survival function is the life expectancy.
- \(X \ge 0\), \(X \sim F(x)\) => \(X \equiv F^{-1}(U), U \sim U(0,1)\). therefore,
- \(E(X) = E\{F^{-1}(U)\} = \int_{0}^{1} F^{-1}(u)du = \int_0^{\infty} 1-F(x) dx = \int_{0}^{\infty} S(x) dx\)
Step by step approach to draw survival curve
1. Basic plot with points and lines, compare the following threes methods
par(mfrow = c(2, 2))
plot(graunt)
plot(lx.17th ~ x, data = graunt)
plot(graunt)
plot(graunt, ann = FALSE, xaxt = "n", yaxt = "n", type = "b")

2. Denote the ages and observed survival rates on the axes
plot(graunt, ann = FALSE, xaxt = "n", yaxt = "n", type = "b")
axis(side = 1, at = graunt$x, labels = graunt$x)
axis(side = 2, at = graunt$lx.17th, labels = graunt$lx.17th)

3. Denote the age 0 and 76 by dotted lines
plot(graunt, ann=F, xaxt="n", yaxt="n", type = "b")
axis(side = 1, at=graunt$x, labels=graunt$x)
axis(side = 2, at = graunt$lx.17th, labels = graunt$lx.17th)
abline(v = c(0, 76), lty = 2)

Setting up coordinates for polygon() (Clockwise)
(graunt.x <- c(graunt$x, 0))
## [1] 0 6 16 26 36 46 56 66 76 0
(graunt.y <- c(graunt$lx.17th, 0))
## [1] 100 64 40 25 16 10 6 3 1 0
4. Shading
plot(graunt, ann = FALSE, xaxt = "n", yaxt = "n", type = "b")
axis(side = 1, at = graunt$x, labels = graunt$x)
axis(side = 2, at = graunt$lx.17th, labels = graunt$lx.17th)
abline(v = c(0, 76), lty = 2)
polygon(graunt.x, graunt.y, density = 15, angle = 135)

5. Grids
plot(graunt, ann = FALSE, xaxt = "n", yaxt = "n", type = "b")
axis(side = 1, at = graunt$x, labels = graunt$x)
axis(side = 2, at = graunt$lx.17th, labels = graunt$lx.17th)
abline(v = c(0, 76), lty = 2)
polygon(graunt.x, graunt.y, density = 15)
abline(v = graunt$x, lty = 2)

6. Title, x-axis label, and y-axis label
par(family = "MS Gothic")
plot(graunt, ann = FALSE, xaxt = "n", yaxt = "n", type = "b")
axis(side = 1, at = graunt$x, labels = graunt$x)
axis(side = 2, at = graunt$lx.17th, labels = graunt$lx.17th)
abline(v = c(0, 76), lty = 2)
polygon(graunt.x, graunt.y, density = 15)
abline(v = graunt$x, lty = 2)
main.title <- "Graunt's Life Distribution"
x.lab <- "Age (years)"
y.lab <- "Survival Rates (%)"
title(main = main.title, xlab = x.lab, ylab = y.lab)

Area under the curve
- The area under the curve can be approximated by the sum of the areas of trapezoids, therefore the area is
- \(\sum_{i=1}^{n-1} (x_{i+1}-x_i)\times\frac{1}{2}(y_i + y_{i+1})\).
diff(), head(), and tail() can be used to write a function to compute the area easily.
area.R <- function(x, y) {
sum(diff(x) * (head(y, -1) + tail(y, -1))/2)
}
area.R(graunt$x, graunt$lx.17th)/100
## [1] 18.17
Comparison with US 1993 life table
- The shaded area between the survival functions of Graunt’s and US 1993 represents the difference of life expectancies.
- Draw Graunt’s first with axes, lower and upper limits
plot(graunt, ann = FALSE, xaxt = "n", yaxt = "n", type = "b")
axis(side = 1, at = graunt$x, labels = graunt$x)
axis(side = 2, at = graunt$lx, labels = graunt$lx.17th)
abline(v=c(0, 76), lty = 2)
abline(v = c(0, 76), lty = 2)

2. Add US 1993 survival function
plot(graunt, ann = FALSE, xaxt = "n", yaxt = "n", type = "b")
axis(side = 1, at = graunt$x, labels = graunt$x)
axis(side = 2, at = graunt$lx, labels = graunt$lx.17th)
abline(v = c(0, 76), lty = 2)
lines(us.93$x, us.93$lx.93, type = "b")

3. Actually, US 1993 life table is truncated at the age 76. Specify that point.
plot(graunt, ann = FALSE, xaxt = "n", yaxt = "n", type = "b")
axis(side = 1, at = graunt$x, labels = graunt$x)
axis(side = 2, at = graunt$lx, labels = graunt$lx.17th)
abline(v = c(0, 76), lty = 2)
lines(us.93$x, us.93$lx.93, type = "b")
abline(h = 70, lty = 2)

4. Using `las = 1` to specify 70%.
plot(graunt, ann = FALSE, xaxt = "n", yaxt = "n", type = "b")
axis(side = 1, at = graunt$x, labels = graunt$x)
axis(side = 2, at = graunt$lx, labels = graunt$lx.17th)
abline(v = c(0, 76), lty = 2)
lines(us.93$x, us.93$lx.93, type = "b")
abline(h = 70, lty = 2)
axis(side = 2, at = 70, labels = 70, las = 1)

Setting coordinates for polygon()
us.graunt.x <- c(us.93$x, rev(graunt$x))
us.graunt.y <- c(us.93$lx.93, rev(graunt$lx.17th))
5. Shading
plot(graunt, ann = FALSE, xaxt = "n", yaxt = "n", type = "b")
axis(side = 1, at = graunt$x, labels = graunt$x)
axis(side = 2, at = graunt$lx, labels = graunt$lx.17th)
abline(v = c(0, 76), lty = 2)
lines(us.93$x, us.93$lx.93, type = "b")
abline(h = 70, lty = 2)
axis(side = 2, at = 70, labels = 70, las = 1)
polygon(us.graunt.x, us.graunt.y, density = 15, col = "red", border = NA)

6. Grids
plot(graunt, ann = FALSE, xaxt = "n", yaxt = "n", type = "b")
axis(side = 1, at = graunt$x, labels = graunt$x)
axis(side = 2, at = graunt$lx, labels = graunt$lx.17th)
abline(v = c(0, 76), lty = 2)
lines(us.93$x, us.93$lx.93, type = "b")
abline(h = 70, lty = 2)
axis(side = 2, at = 70, labels = 70, las = 1)
polygon(us.graunt.x, us.graunt.y, density = 15, col = "red", border = NA)
abline(v = graunt$x, lty = 2)

7. Title, x-axis and y-axis labels
plot(graunt, ann = FALSE, xaxt = "n", yaxt = "n", type = "b")
axis(side = 1, at = graunt$x, labels = graunt$x)
axis(side = 2, at = graunt$lx, labels = graunt$lx.17th)
abline(v = c(0, 76), lty = 2)
lines(us.93$x, us.93$lx.93, type = "b")
abline(h = 70, lty = 2)
axis(side = 2, at = 70, labels = 70, las = 1)
polygon(us.graunt.x, us.graunt.y, density = 15, col = "red", border = NA)
abline(v = graunt$x, lty = 2)
main.title.g.us <- "Survival Function of Graunt and US 1993"
title(main = main.title.g.us, xlab = x.lab, ylab = y.lab)

dev.copy(device = png, file = "../pics/graunt_us93.png")
## quartz_off_screen
## 3
Life expectancy
- The area under the US 1993 survival function is
area.R(us.93$x, us.93$lx.93)/100
## [1] 70.92
- The area of shaded region is
area.R(us.93$x, us.93$lx.93)/100 - area.R(graunt$x, graunt$lx.17th)/100
## [1] 52.75
Comparison with Halley’s life table
Halley’s life table
age <- 0:84
lx <- c(1238, 1000, 855, 798, 760, 732, 710, 692, 680, 670, 661, 653, 646, 640, 634, 628, 622, 616, 610, 604, 598, 592, 586, 579, 573, 567, 560, 553, 546, 539, 531, 523, 515, 507, 499, 490, 481, 472, 463, 454, 445, 436, 427, 417, 407, 397, 387, 377, 367, 357, 346, 335, 324, 313, 302, 292, 282, 272, 262, 252, 242, 232, 222, 212, 202, 192, 182, 172, 162, 152, 142, 131, 120, 109, 98, 88, 78, 68, 58, 50, 41, 34, 28, 23, 20)
length(lx)
## [1] 85
halley <- data.frame(age, lx)
halley$px <- round(halley$lx/1238*100, digits = 1)
halley
## age lx px
## 1 0 1238 100.0
## 2 1 1000 80.8
## 3 2 855 69.1
## 4 3 798 64.5
## 5 4 760 61.4
## 6 5 732 59.1
## 7 6 710 57.4
## 8 7 692 55.9
## 9 8 680 54.9
## 10 9 670 54.1
## 11 10 661 53.4
## 12 11 653 52.7
## 13 12 646 52.2
## 14 13 640 51.7
## 15 14 634 51.2
## 16 15 628 50.7
## 17 16 622 50.2
## 18 17 616 49.8
## 19 18 610 49.3
## 20 19 604 48.8
## 21 20 598 48.3
## 22 21 592 47.8
## 23 22 586 47.3
## 24 23 579 46.8
## 25 24 573 46.3
## 26 25 567 45.8
## 27 26 560 45.2
## 28 27 553 44.7
## 29 28 546 44.1
## 30 29 539 43.5
## 31 30 531 42.9
## 32 31 523 42.2
## 33 32 515 41.6
## 34 33 507 41.0
## 35 34 499 40.3
## 36 35 490 39.6
## 37 36 481 38.9
## 38 37 472 38.1
## 39 38 463 37.4
## 40 39 454 36.7
## 41 40 445 35.9
## 42 41 436 35.2
## 43 42 427 34.5
## 44 43 417 33.7
## 45 44 407 32.9
## 46 45 397 32.1
## 47 46 387 31.3
## 48 47 377 30.5
## 49 48 367 29.6
## 50 49 357 28.8
## 51 50 346 27.9
## 52 51 335 27.1
## 53 52 324 26.2
## 54 53 313 25.3
## 55 54 302 24.4
## 56 55 292 23.6
## 57 56 282 22.8
## 58 57 272 22.0
## 59 58 262 21.2
## 60 59 252 20.4
## 61 60 242 19.5
## 62 61 232 18.7
## 63 62 222 17.9
## 64 63 212 17.1
## 65 64 202 16.3
## 66 65 192 15.5
## 67 66 182 14.7
## 68 67 172 13.9
## 69 68 162 13.1
## 70 69 152 12.3
## 71 70 142 11.5
## 72 71 131 10.6
## 73 72 120 9.7
## 74 73 109 8.8
## 75 74 98 7.9
## 76 75 88 7.1
## 77 76 78 6.3
## 78 77 68 5.5
## 79 78 58 4.7
## 80 79 50 4.0
## 81 80 41 3.3
## 82 81 34 2.7
## 83 82 28 2.3
## 84 83 23 1.9
## 85 84 20 1.6
R base graphics
- To make the comparison easy, plot the points at the same age group of Graunt’s, 0, 6, 16, 26, 36, 46, 56, 66, 76. Step by step approach
- Halley’s survival function first
plot(px ~ age, data = halley, ann = FALSE, xaxt = "n", yaxt = "n", type = "l")

2. Mark the points at 0, 6, 16, 26, 36, 46, 56, 66, 76 on Halley's survival function.
age.graunt <- age %in% graunt$x
plot(px ~ age, data = halley, ann = FALSE, xaxt = "n", yaxt = "n", type = "l")
points(px[age.graunt] ~ age[age.graunt], data = halley)

Using subset()
halley.graunt <- subset(halley, age.graunt)
plot(px ~ age, data = halley, ann = FALSE, xaxt = "n", yaxt = "n", type = "l")
points(px ~ age, data = halley.graunt)

3. Add Graunt's survival function
plot(px ~ age, data = halley, ann = FALSE, xaxt = "n", yaxt = "n", type = "l")
points(px ~ age, data = halley.graunt)
lines(lx.17th ~ x, data = graunt, type = "b")

4. x-axis label and y-axis label with `las = 1`
plot(px ~ age, data = halley, ann = FALSE, xaxt = "n", yaxt = "n", type = "l")
points(px ~ age, data = halley.graunt)
lines(lx.17th ~ x, data = graunt, type = "b")
axis(side = 1, at = c(graunt$x, 84), labels = c(graunt$x, 84))
axis(side = 2, at = graunt$lx.17th, labels = graunt$lx.17th, las = 1)

5. Vertical dotted lines at the ages 0, 76, and 84
plot(px ~ age, data = halley, ann = FALSE, xaxt = "n", yaxt = "n", type = "l")
points(px ~ age, data = halley.graunt)
lines(lx.17th ~ x, data = graunt, type = "b")
axis(side = 1, at = c(graunt$x, 84), labels = c(graunt$x, 84))
axis(side = 2, at = graunt$lx.17th, labels = graunt$lx.17th, las = 1)
abline(v = c(0, 76, 84), lty = 2)

6. Specify the developers at proper coordinates with `text()`
plot(px ~ age, data = halley, ann = FALSE, xaxt = "n", yaxt = "n", type = "l")
points(px ~ age, data = halley.graunt)
lines(lx.17th ~ x, data = graunt, type = "b")
axis(side = 1, at = c(graunt$x, 84), labels = c(graunt$x, 84))
axis(side = 2, at = graunt$lx.17th, labels = graunt$lx.17th, las = 1)
abline(v = c(0, 76, 84), lty = 2)
text(x = c(16, 36), y = c(20, 50), label = c("Graunt", "Halley"))

6. Main title, x-axis label, and y-axis label
plot(px ~ age, data = halley, ann = FALSE, xaxt = "n", yaxt = "n", type = "l")
points(px ~ age, data = halley.graunt)
lines(lx.17th ~ x, data = graunt, type = "b")
axis(side = 1, at = c(graunt$x, 84), labels = c(graunt$x, 84))
axis(side = 2, at = graunt$lx.17th, labels = graunt$lx.17th, las = 1)
abline(v = c(0, 76, 84), lty = 2)
text(x = c(16, 36), y = c(20, 50), label = c("Graunt", "Halley"))
main.title.2 <- "Survival Function of Graunt and Halley"
title(main = main.title.2, xlab = x.lab, ylab = y.lab)

Polygon
- Setting the coordinates for
polygon(). The intersection is found at x = 10.8, y = 52.8 with locator(1) and couple of trial and errors
poly.1.x <- c(graunt$x[1:2], 10.8, halley$age[11:1])
poly.1.y <- c(graunt$lx.17th[1:2], 52.8, halley$px[11:1])
* Lower region
poly.2.x <- c(10.8, halley$age[12:85], graunt$x[9:3])
poly.2.y <- c(52.8, halley$px[12:85], graunt$lx.17th[9:3])
7. Shading upper region first
plot(px ~ age, data = halley, ann = FALSE, xaxt = "n", yaxt = "n", type = "l")
points(px ~ age, data = halley.graunt)
lines(lx.17th ~ x, data = graunt, type = "b")
axis(side = 1, at = c(graunt$x, 84), labels = c(graunt$x, 84))
axis(side = 2, at = graunt$lx.17th, labels = graunt$lx.17th, las = 1)
abline(v=c(0, 76, 84), lty = 2)
text(x = c(16, 36), y = c(20, 50), label = c("Graunt", "Halley"))
title(main = main.title.2, xlab = x.lab, ylab = y.lab)
polygon(poly.1.x, poly.1.y, angle = 45, density = 15, col = "blue")

8. Shading lower region next
plot(px ~ age, data = halley, ann = FALSE, xaxt = "n", yaxt = "n", type = "l")
points(px ~ age, data = halley.graunt)
lines(lx.17th ~ x, data = graunt, type = "b")
axis(side = 1, at = c(graunt$x, 84), labels = c(graunt$x, 84))
axis(side = 2, at = graunt$lx.17th, labels = graunt$lx.17th, las = 1)
abline(v=c(0, 76, 84), lty = 2)
text(x = c(16, 36), y = c(20, 50), label = c("Graunt", "Halley"))
title(main = main.title.2, xlab = x.lab, ylab = y.lab)
polygon(poly.1.x, poly.1.y, angle = 45, density = 15, col = "blue")
polygon(poly.2.x, poly.2.y, angle = 45, density = 15, col = "red")

dev.copy(device = png, file = "../pics/graunt_halley.png")
## quartz_off_screen
## 4
Life expectancy
- Compute the difference of life expectancies
(life.exp.halley <- area.R(halley$age, halley$px)/100)
## [1] 27.872
(life.exp.graunt <- area.R(graunt$x, graunt$lx.17th)/100)
## [1] 18.17
ggplot
library(ggplot2)
Data Reshape
- Attach
reshape2 package to change wide format to long format
library(reshape2)
graunt.us.melt <- melt(graunt.us, id.vars = "x", measure.vars = c("lx.17th", "lx.93"), value.name = "lx", variable.name = "times")
graunt.us.melt
## x times lx
## 1 0 lx.17th 100
## 2 6 lx.17th 64
## 3 16 lx.17th 40
## 4 26 lx.17th 25
## 5 36 lx.17th 16
## 6 46 lx.17th 10
## 7 56 lx.17th 6
## 8 66 lx.17th 3
## 9 76 lx.17th 1
## 10 0 lx.93 100
## 11 6 lx.93 99
## 12 16 lx.93 99
## 13 26 lx.93 98
## 14 36 lx.93 97
## 15 46 lx.93 95
## 16 56 lx.93 92
## 17 66 lx.93 84
## 18 76 lx.93 70
str(graunt.us.melt)
## 'data.frame': 18 obs. of 3 variables:
## $ x : num 0 6 16 26 36 46 56 66 76 0 ...
## $ times: Factor w/ 2 levels "lx.17th","lx.93": 1 1 1 1 1 1 1 1 1 2 ...
## $ lx : num 100 64 40 25 16 10 6 3 1 100 ...
- Change factor levels of
times
levels(graunt.us.melt$times) <- c("17th", "1993")
str(graunt.us.melt)
## 'data.frame': 18 obs. of 3 variables:
## $ x : num 0 6 16 26 36 46 56 66 76 0 ...
## $ times: Factor w/ 2 levels "17th","1993": 1 1 1 1 1 1 1 1 1 2 ...
## $ lx : num 100 64 40 25 16 10 6 3 1 100 ...
Plot
Points and Lines
- Step by step approach to understand the grammar of ggplot
- We set
ggplot() to accept varying data.frame() and aes()in geom_polygon
g1 <- ggplot() + geom_point(data = graunt.us.melt, aes(x = x, y = lx, colour = times))
g1

g2 <- g1 + geom_line(data = graunt.us.melt, aes(x = x, y = lx, colour = times))
g2

g3 <- g2 + theme_bw()
g3

Polygon
- Reuse
us.graunt.x and us.graunt.y for polygon(). Note how to remove default legends.
p3 <- g3 + geom_polygon(data = data.frame(x = us.graunt.x, y = us.graunt.y), aes(x = x, y = y), alpha = 0.3, fill = "red")
p3

p4 <- p3 + guides(colour = "none")
p4

Change default annotations
Points and Lines
1. Change the x-axis and y-axis labels
(g4 <- g3 + xlab(x.lab) + ylab(y.lab))

2. Main title
(g4 <- g3 + xlab(x.lab) + ylab(y.lab) +
ggtitle(main.title.g.us))

3. Change legend title
(g4 <- g3 +
xlab(x.lab) + ylab(y.lab) +
ggtitle(main.title.g.us) +
labs(colour = "Era"))

4. Change legends.
(g4 <- g3 +
xlab(x.lab) + ylab(y.lab) +
ggtitle(main.title.g.us) +
labs(colour = "Era") +
scale_colour_discrete(labels = c("Graunt Era", "US 1993")))

- Place legends inside the plot
(g5 <- g4 + theme(legend.position = c(0.8, 0.5)))

- Change x-axis and y-axis tick marks
(g6 <- g5 + scale_x_continuous(breaks = graunt$x) + scale_y_continuous(breaks = graunt$lx.17th))

ggsave("../pics/graunt_us_plot.png", g6)
## Saving 6 x 6 in image
Polygon
- Add information to the plot drawn with
polygon()
- Start with p4
p4

2. Main title, x-axis and y-axis labels
(p5 <- p4 +
xlab(x.lab) + ylab(y.lab) +
ggtitle(main.title.g.us))

3. "Graunt Era", "US 1993", "Difference of Life Expectancies" at the proper position
(p6 <- p5 + annotate("text", x = c(20, 40, 70), y = c(20, 60, 90), label=c("Graunt Era", "Difference of\nLife Expectancies", "US 1993")))

4. x-axis and y-axis tick marks
(p7 <- p6 + scale_x_continuous(breaks = graunt$x) + scale_y_continuous(breaks = graunt$lx.17th))

ggsave("../pics/graunt_us_poly.png", p7)
## Saving 6 x 6 in image
Graunt and Halley
Data Reshaping
graunt.2 <- graunt
names(graunt.2) <- c("x", "Graunt")
halley.2 <- halley[-2]
names(halley.2) <- c("x", "Halley")
graunt.halley.melt <- melt(list(graunt.2, halley.2), id.vars = "x", value.name = "lx", variable.name = "Who")
str(graunt.halley.melt)
## 'data.frame': 94 obs. of 4 variables:
## $ x : num 0 6 16 26 36 46 56 66 76 0 ...
## $ Who: Factor w/ 2 levels "Graunt","Halley": 1 1 1 1 1 1 1 1 1 2 ...
## $ lx : num 100 64 40 25 16 10 6 3 1 100 ...
## $ L1 : int 1 1 1 1 1 1 1 1 1 2 ...
graunt.halley.melt.g <- subset(graunt.halley.melt, graunt.halley.melt$x %in% graunt$x)
Survival Function, Step by Step
gh1 <- ggplot() + geom_line(data = graunt.halley.melt, aes(x = x, y = lx, colour = Who))
gh1

gh2 <- gh1 + geom_point(data = graunt.halley.melt.g, aes(x = x, y = lx, colour = Who))
gh2

gh3 <- gh2 +
theme_bw() +
xlab(x.lab) + ylab(y.lab) +
ggtitle(main.title.2)
gh3

gh4 <- gh3 +
theme(legend.position = c(0.8, 0.8)) +
annotate("text", x = c(16, 36), y = c(20, 50), label=c("Graunt", "Halley"))
gh4

ggsave("../pics/graunt_halley_ggplot.png", gh4)
## Saving 6 x 6 in image
Polygon
ghp4 <- gh4 + geom_polygon(data = data.frame(x = poly.1.x, y = poly.1.y), aes(x = x, y = y), alpha = 0.3, fill = "blue")
ghp4

ghp5 <- ghp4 + geom_polygon(data = data.frame(x = poly.2.x, y = poly.2.y), aes(x = x, y = y), alpha = 0.3, fill = "red")
ghp5

ggsave("../pics/graunt_halley_poly_ggplot.png", ghp5)
## Saving 6 x 6 in image
dump() and source()
- Check out how to save and retreve. Use
source() for retrieval
dump("area.R", file = "area.R")
save.image("graunt_halley_160329.rda")